# Clear memory
rm(list = ls())

# Load packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  data.table, janitor, magrittr, fixest, 
  broom, tidyverse, tidylog, sf, maps, DBI, 
  tigris
)
options("tidylog.display" = NULL)
`%notin%` <- Negate(`%in%`)

### fix mismatched fips in virginia

# mismatches between income/emp and maps
county_income_industry <- fread("raw/BEA/CAINC5S__ALL_AREAS_1969_2000.csv", fill = TRUE) %>%
  as_tibble() %>%
  clean_names() %>%
  filter(geo_fips != "00000" & industry_classification != "..." & industry_classification != "")

counties <- counties(cb = TRUE, year = 2020) %>%
  st_as_sf() %>%
  dplyr::rename(fips = GEOID) %>% 
  mutate(STATEFP = as.numeric(STATEFP)) %>% 
  filter(STATEFP != 15 & STATEFP != 2 & STATEFP <= 56 & STATEFP != 11)  %>% 
  st_transform(5070)

counties_sdf <- st_as_sf(maps::map("county", plot = FALSE, fill = TRUE)) %>% 
  rename(polyname = ID) %>% 
  full_join(maps::county.fips) %>% 
  mutate(fips = ifelse(fips == 46113, 46102, fips)) %>% 
  mutate(fips = ifelse(polyname == "florida,okaloosa", 12091, fips)) %>% 
  mutate(fips = ifelse(polyname == "virginia,accomack", 51001, fips)) %>% 
  mutate(fips = ifelse(polyname == "texas,galveston", 48167, fips)) %>% 
  mutate(fips = ifelse(polyname == "louisiana,st martin", 22099, fips)) %>% 
  mutate(fips = ifelse(polyname == "north carolina,currituck", 37053, fips)) %>% 
  mutate(fips = ifelse(polyname == "washington,pierce", 53053, fips)) %>% 
  mutate(fips = ifelse(polyname == "washington,san juan", 53055, fips)) %>% 
  mutate(fips = str_pad(fips, 5, "left", pad = "0")) %>% 
  st_transform(5070)

va_bea = county_income_industry %>% 
  distinct(geo_fips, .keep_all = TRUE) %>% 
  rename(fips = geo_fips) %>% 
  filter(as.numeric(fips) > 51000 & as.numeric(fips) < 52000)
va_maps = counties_sdf %>% 
  filter(as.numeric(fips) > 51000 & as.numeric(fips) < 52000)

mismatch = full_join(va_bea, va_maps) %>% 
  dplyr::select(fips, polyname, geo_name) %>% 
  filter(is.na(polyname) | is.na(geo_name)) %>% 
  filter(!str_detect(geo_name, "Independent") | is.na(geo_name)) %>% 
  rename(geo_fips = fips)

mismatch$polyname[is.na(mismatch$polyname)] = mismatch$polyname[!is.na(mismatch$polyname)]
mismatch$geo_name[is.na(mismatch$geo_name)] = mismatch$polyname[!is.na(mismatch$geo_name)]

va_cross = va_maps %>% left_join(mismatch) %>% 
  mutate(geo_fips = ifelse(is.na(geo_fips), fips, geo_fips)) %>% 
  mutate(fips_maps = fips, fips_bea = geo_fips) %>% 
  as_tibble() %>% 
  dplyr::select(-geom) %>% 
  dplyr::select(polyname, fips_maps, fips_bea) 

# fix bea fips codes
va_bea = fread("data/payroll_cf_temp.csv") %>% 
  as_tibble() %>% 
  mutate(fips = str_pad(fips, 5, "left", pad = "0")) %>% 
  mutate(fips_bea = fips) %>% 
  left_join(va_cross) %>% 
  mutate(fips = ifelse(!is.na(fips_maps), fips_maps, fips)) %>% 
  distinct(fips, industry, .keep_all = TRUE) %>% 
  dplyr::select(-polyname, -fips_maps)

fwrite(va_bea, "data/payroll_cf.csv")

full_cross = fread("data/payroll_cf.csv") %>% 
  distinct(fips) %>% 
  mutate(fips = str_pad(fips, 5, "left", pad = "0")) %>% 
  mutate(fips_maps = fips) %>% 
  left_join(va_cross) %>% 
  mutate(fips_bea = ifelse(is.na(fips_bea), fips, fips_bea)) %>% 
  distinct(fips, fips_bea)

fwrite(full_cross, file = "data/mismatched-fips-crosswalk.csv")

# fips codes in trade costs
tau_cross = fread("data/tradecosts/Tau1997.csv")[, 1] %>% 
  as_tibble() %>% 
  rename(fips_bea = V1) %>% 
  mutate(fips_bea = str_pad(fips_bea, 5, "left", pad = "0"))

# fips codes in ap3
ap3_cross = fread("raw/modified-ap3/fips.csv") %>% 
  as_tibble() %>% 
  rename(fips = V1) %>% 
  mutate(fips = str_pad(fips, 5, "left", pad = "0")) %>% 
  mutate(fips = ifelse(fips == "12025", "12086", fips))

# distinct fips to get rid of multi-bea crosses in the single fips list: 3022 counties
crosswalk = full_cross %>% 
  inner_join(tau_cross) %>% 
  inner_join(ap3_cross) %>% 
  mutate(across(everything(), ~ as.character(.x))) %>% 
  mutate(fips = str_pad(fips, 5, "left", pad = "0")) %>% 
  distinct(fips)

fwrite(crosswalk, file = "data/simulation_fips_crosswalk_temp.csv")


